﻿using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using WordNetClasses;

namespace WordNetTools
{
    public class XiaobinAlgorithm
    {
        string dictPath = @"e:\Program Files\WordNet\2.1\dict\";

        public List<List<AnalysedWord>> DisambiguateText(List<List<AnalysedWord>> text)
        {
            foreach (List<AnalysedWord> sentence in text)
            {
                for (int i = 0; i < sentence.Count; i++)
                {
                    Wnlib.SynSet result;
                    if (DisambiguateWord(sentence[i].Word, i, sentence, text, out result))
                    {
                        sentence[i].Sense = result;
                    }
                }
            }
            return text;
        }

        public bool DisambiguateWord(string word, int wordPosition, List<AnalysedWord> sentence, List<List<AnalysedWord>> text, out Wnlib.SynSet result)
        {
            //jesli to nie rzeczownik
            if (sentence[wordPosition].POS != "NN" && sentence[wordPosition].POS != "NNS")
            {
                result = null;
                return false;
            }

            Wnlib.SynSet resultSense = null;

            if (sentence[wordPosition].Meaningless.HasValue && sentence[wordPosition].Meaningless.Value)
            {
                result = null;
                return false;
            }

            Wnlib.WNCommon.path = dictPath;
            Wnlib.PartOfSpeech pos = Wnlib.PartOfSpeech.of("noun");
            Wnlib.Search mySearch = new Wnlib.Search(word, true, pos, new Wnlib.SearchType(false, "OVERVIEW"), 0); //Search Types are defined in util.cs:107
            //TODO sprawdzić jeszcze morphy
            if (mySearch.senses.Count == 0 && mySearch.morphs.Count > 0)
            {
                foreach (DictionaryEntry morph in mySearch.morphs)
                {
                    if (((Wnlib.Search)morph.Value).senses.Count > 0)
                    {
                        mySearch = (Wnlib.Search)morph.Value;
                        sentence[wordPosition].Word = mySearch.word;
                        text[text.Count - 1][wordPosition].Word = mySearch.word;
                        break;
                    }
                }
                
            }

            if (Step1(mySearch, out resultSense))
            {
                if (resultSense != null)
                {
                    result = resultSense;
                    sentence[wordPosition].Meaningless = false;
                    return true;    //word disambiguated
                }
                else
                {
                    result = null;
                    return false;  //no such word in WordNet
                }
            }
            if (Step2And3(mySearch, wordPosition, sentence, text, out resultSense))
            {
                if (resultSense != null)
                {
                    result = resultSense;
                    sentence[wordPosition].Meaningless = false;
                    return true;    //word disambiguated
                }
            }
            if (Step4(mySearch, wordPosition, sentence, text, out resultSense))
            {
                if (resultSense != null)
                {
                    result = resultSense;
                    sentence[wordPosition].Meaningless = false;
                    return true;    //word disambiguated
                }
            }
            if (Step5And6(mySearch, wordPosition, sentence, text, out resultSense))
            {
                if (resultSense != null)
                {
                    result = resultSense;
                    sentence[wordPosition].Meaningless = false;
                    return true;    //word disambiguated
                }
            }
            if (Step7(mySearch, wordPosition, sentence, text, out resultSense))
            {
                if (resultSense != null)
                {
                    result = resultSense;
                    sentence[wordPosition].Meaningless = false;
                    return true;    //word disambiguated
                }
            }
            //step 8 does not apply in my algorithm yet
            result = null;
            return false;
        }

        public bool Step1(Wnlib.Search word, out Wnlib.SynSet mySense)
        {
            if (word.senses.Count == 0) //no such word in WordNet
            {
                mySense = null;
                return true;
            }
            if (word.senses.Count == 1) //word disambiguated
            {
                mySense = word.senses[0];
                return true;
            }
            else   //go to the next step
            {
                mySense = null;
                return false;
            }
        }

        /// <summary>
        /// Step 2
        /// </summary>
        /// <param name="word">The word to disambiguate</param>
        /// <param name="wordPosition">The position index of the word within the sentence</param>
        /// <param name="sentence">The sentence (split into a list of words) in which the word occured</param>
        /// <param name="text">The whole text available for the analysis - the context (split into sentences)</param>
        /// <returns></returns>
        public bool Step2And3(Wnlib.Search word, int wordPosition, List<AnalysedWord> sentence, List<List<AnalysedWord>> text, out Wnlib.SynSet mySense)
        {
            string verbContext = GetVerbContext(wordPosition, sentence);
            if (verbContext == String.Empty)
            {
                mySense = null;
                return false;  //no verb context
            }

            List<string> similarNouns = new List<string>();

            foreach (List<AnalysedWord> localSentence in text) //dla każdego zdania
            {
                int j = 0;
                foreach (AnalysedWord localWord in localSentence)
                {
                    if (IsTheSameWord(verbContext, localWord.Word, "verb"))   //jeśli znalazłem ten sam czasownik
                    {
                        for (int k = j + 1; k < localSentence.Count; k++)    //to wśród stojących za nim słów w danym zdaniu
                        {
                            string tmpNoun = CanBeANoun(localSentence[k]);   //szukaj rzeczowników
                            if (tmpNoun != String.Empty)
                            {
                                if (tmpNoun != word.word)
                                {
                                    similarNouns.Add(tmpNoun);  //i takowe dodaj do listy
                                }
                                break;
                                //TODO !! dodać obsługę kilku rzeczowników (określających), ex. pirate ship
                            }
                        }
                    }
                    j++;
                }
            }

            //Step 2
            for (int senseNr = 0; senseNr < word.senses.Count; senseNr++)
            {
                Wnlib.Search tmpSearch = new Wnlib.Search(word.word, false, word.pos, new Wnlib.SearchType("HYPERPTR"), senseNr + 1);
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
                {
                    if(similarNouns.Contains(((Wnlib.Lexeme)lexeme.Key).word))
                    {
                        mySense = word.senses[senseNr];
                        return true;
                    }
                }
                tmpSearch = new Wnlib.Search(word.word, false, word.pos, new Wnlib.SearchType("HYPOPTR"), senseNr + 1);
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
                {
                    if (similarNouns.Contains(((Wnlib.Lexeme)lexeme.Key).word))
                    {
                        mySense = word.senses[senseNr];
                        return true;
                    }
                }
            }

            //Step 3
            for (int senseNr = 0; senseNr < word.senses.Count; senseNr++)
            {
                Wnlib.Search tmpSearch = new Wnlib.Search(word.word, false, word.pos, new Wnlib.SearchType("COORDS"), senseNr + 1);
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
                {
                    if (similarNouns.Contains(((Wnlib.Lexeme)lexeme.Key).word))
                    {
                        mySense = word.senses[senseNr];
                        return true;
                    }
                }
            }

            mySense = null;
            return false;
        }

        public bool Step4(Wnlib.Search word, int wordPosition, List<AnalysedWord> sentence, List<List<AnalysedWord>> text, out Wnlib.SynSet mySense)
        {
            string verbContext = GetVerbContext(wordPosition, sentence);
            if (verbContext == String.Empty)
            {
                mySense = null;
                return false;  //no verb context
            }

            List<AnalysedWord> sameNouns = new List<AnalysedWord>();
            List<string> candidateVerbs = new List<string>();

            foreach (List<AnalysedWord> localSentence in text) //dla każdego zdania
            {
                int j = 0;
                foreach (AnalysedWord localWord in localSentence)
                {
                    if (localWord.Word == word.word && localWord.Sense != null)   //jeśli znalazłem takie samo słowo (zakładam że to rzeczownik) i jego sens jest już znany
                    {
                        for (int k = j - 1; k > 0; k--)    //to wśród stojących przed nim słów w danym zdaniu
                        {
                            string tmpVerb = CanBeAVerb(localSentence[k]);   //szukaj czasowników
                            if (tmpVerb != String.Empty)
                            {
                                sameNouns.Add(localWord);
                                candidateVerbs.Add(tmpVerb);  //i takowe dodaj do listy
                                break;
                                //TODO !! dodać obsługę kilku rzeczowników (określających), ex. pirate ship
                            }
                        }
                    }
                    j++;
                }
            }

            Wnlib.PartOfSpeech pos = Wnlib.PartOfSpeech.of("verb");

            for (int verbNr = 0; verbNr < candidateVerbs.Count; verbNr++)
            {
                //TODO zastanowić się czy nie należałoby znać najpierw sensu tego czasownika
                Wnlib.Search tmpSearch = new Wnlib.Search(candidateVerbs[verbNr], true, pos, new Wnlib.SearchType("HYPERPTR"), 0);
                if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
                {
                    foreach (DictionaryEntry morph in tmpSearch.morphs)
                    {
                        if (((Wnlib.Search)morph.Value).senses.Count > 0)
                        {
                            tmpSearch = (Wnlib.Search)morph.Value;
                            break;
                        }
                    }

                }
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
                {
                    if (verbContext == ((Wnlib.Lexeme)lexeme.Key).word)
                    {
                        mySense = sameNouns[verbNr].Sense;
                        return true;
                    }
                }
                tmpSearch = new Wnlib.Search(candidateVerbs[verbNr], true, pos, new Wnlib.SearchType("HYPOPTR"), 0);
                if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
                {
                    foreach (DictionaryEntry morph in tmpSearch.morphs)
                    {
                        if (((Wnlib.Search)morph.Value).senses.Count > 0)
                        {
                            tmpSearch = (Wnlib.Search)morph.Value;
                            break;
                        }
                    }

                }
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
                {
                    if (verbContext == ((Wnlib.Lexeme)lexeme.Key).word)
                    {
                        mySense = sameNouns[verbNr].Sense;
                        return true;
                    }
                }
                tmpSearch = new Wnlib.Search(candidateVerbs[verbNr], true, pos, new Wnlib.SearchType("COORDS"), 0);
                if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
                {
                    foreach (DictionaryEntry morph in tmpSearch.morphs)
                    {
                        if (((Wnlib.Search)morph.Value).senses.Count > 0)
                        {
                            tmpSearch = (Wnlib.Search)morph.Value;
                            break;
                        }
                    }

                }
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
                {
                    if (verbContext == ((Wnlib.Lexeme)lexeme.Key).word)
                    {
                        mySense = sameNouns[verbNr].Sense;
                        return true;
                    }
                }
            }
            mySense = null;
            return false;
        }

        public bool Step5And6(Wnlib.Search word, int wordPosition, List<AnalysedWord> sentence, List<List<AnalysedWord>> text, out Wnlib.SynSet mySense)
        {
            string verbContext = GetVerbContext(wordPosition, sentence);
            if (verbContext == String.Empty)
            {
                mySense = null;
                return false;  //no verb context
            }

            List<AnalysedWord> otherNouns = new List<AnalysedWord>();
            List<AnalysedWord> otherVerbs = new List<AnalysedWord>();

            foreach (List<AnalysedWord> localSentence in text) //dla każdego zdania
            {
                int j = 0;
                foreach (AnalysedWord localWord in localSentence)
                {
                    string noun = CanBeANoun(localWord);
                    if (noun != String.Empty)   //jeśli znalazłem rzeczownik
                    {
                        for (int k = j - 1; k > 0; k--)    //to wśród stojących przed nim słów w danym zdaniu
                        {
                            string tmpVerb = CanBeAVerb(localSentence[k]);   //szukaj czasowników
                            if (tmpVerb != String.Empty)    //jeśli znalazłem
                            {
                                otherNouns.Add(localWord);  //dodaj rzeczownik i
                                otherVerbs.Add(localSentence[k]);  //czasownik do list - jako parę
                                break;
                                //TODO !! dodać obsługę kilku rzeczowników (określających), ex. pirate ship
                            }
                        }
                    }
                    j++;
                }
            }

            Wnlib.PartOfSpeech posVerb = Wnlib.PartOfSpeech.of("verb");
            Wnlib.PartOfSpeech posNoun = Wnlib.PartOfSpeech.of("noun");

            //step 5
            for (int x = 0; x < word.senses.Count; x++) //dla wszystkich znaczeń mojego słowa
            {
                Wnlib.Search tmpSearch = new Wnlib.Search(word.word, false, posNoun, new Wnlib.SearchType("HYPERPTR"), x + 1); //szukaj synonimów i hipernimów
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)   //dla każdego synonimu
                {
                    for (int y = 0; y < otherNouns.Count; y++)  //wśród znalezionych wcześniej rzeczowników
                    {
                        if (otherNouns[y].Word == ((Wnlib.Lexeme)lexeme.Key).word)  //szukaj takiego samego
                        {
                            if (AreVerbsRelated(verbContext, otherVerbs[y].Word))
                            {
                                mySense = word.senses[x];
                                return true;
                            }
                        }
                    }
                }
                tmpSearch = new Wnlib.Search(word.word, false, posNoun, new Wnlib.SearchType("HYPOPTR"), x + 1); //szukaj hyponimów
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)   //dla każdego synonimu
                {
                    for (int y = 0; y < otherNouns.Count; y++)  //wśród znalezionych wcześniej rzeczowników
                    {
                        if (otherNouns[y].Word == ((Wnlib.Lexeme)lexeme.Key).word)  //szukaj takiego samego
                        {
                            if (AreVerbsRelated(verbContext, otherVerbs[y].Word))
                            {
                                mySense = word.senses[x];
                                return true;
                            }
                        }
                    }
                }
            }

            //step 6
            for (int x = 0; x < word.senses.Count; x++) //dla wszystkich znaczeń mojego słowa
            {
                Wnlib.Search tmpSearch = new Wnlib.Search(word.word, false, posNoun, new Wnlib.SearchType("COORDS"), x + 1);
                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)   //dla każdego synonimu
                {
                    for (int y = 0; y < otherNouns.Count; y++)  //wśród znalezionych wcześniej rzeczowników
                    {
                        if (otherNouns[y].Word == ((Wnlib.Lexeme)lexeme.Key).word)  //szukaj takiego samego
                        {
                            if (AreVerbsRelated(verbContext, otherVerbs[y].Word))
                            {
                                mySense = word.senses[x];
                                return true;
                            }
                        }
                    }
                }
            }
            mySense = null;
            return false;
        }

        public bool Step7(Wnlib.Search word, int wordPosition, List<AnalysedWord> sentence, List<List<AnalysedWord>> text, out Wnlib.SynSet mySense)
        {
            Wnlib.PartOfSpeech posNoun = Wnlib.PartOfSpeech.of("noun");
            
            
            for (int i = wordPosition + 1; i < sentence.Count - 2; i++) // -2 bo po 'such' muszą stać co najmniej 2 słowa
            {
                if (sentence[i].Word == "such" && sentence[i + 1].Word == "as")
                {
                    for (int j = i + 2; j < sentence.Count; j++)
                    {
                        string tmpNoun = CanBeANoun(sentence[j]);
                        if (tmpNoun != String.Empty)
                        {
                            for (int x = 0; x < word.senses.Count; x++) //dla wszystkich znaczeń mojego słowa
                            {
                                Wnlib.Search tmpSearch = new Wnlib.Search(word.word, false, posNoun, new Wnlib.SearchType("HYPERPTR"), x + 1);
                                foreach (DictionaryEntry lexeme in tmpSearch.lexemes)   //dla każdego synonimu
                                {
                                    if (word.word == ((Wnlib.Lexeme)lexeme.Key).word)
                                    {
                                        mySense = word.senses[x];
                                        return true;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            mySense = null;
            return false;
        }

        /// <summary>
        /// Checks if the verbs are connected with synonymous, hyponymous, or coordinate relation
        /// </summary>
        /// <param name="verb1"></param>
        /// <param name="verb2"></param>
        /// <returns></returns>
        private bool AreVerbsRelated(string verb1, string verb2)
        {
            Wnlib.PartOfSpeech posVerb = Wnlib.PartOfSpeech.of("verb");
            //make sure verbs are in their normal form
            Wnlib.Search tmpSearch = new Wnlib.Search(verb1, true, posVerb, new Wnlib.SearchType("OVERVIEW"), 0);
            if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
            {
                foreach (DictionaryEntry morph in tmpSearch.morphs)
                {
                    if (((Wnlib.Search)morph.Value).senses.Count > 0)
                    {
                        verb1 = ((Wnlib.Search)morph.Value).word;
                        break;
                    }
                }
            }
            tmpSearch = new Wnlib.Search(verb2, true, posVerb, new Wnlib.SearchType("OVERVIEW"), 0);
            if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
            {
                foreach (DictionaryEntry morph in tmpSearch.morphs)
                {
                    if (((Wnlib.Search)morph.Value).senses.Count > 0)
                    {
                        verb2 = ((Wnlib.Search)morph.Value).word;
                        break;
                    }
                }
            }
            if (verb1 == verb2) return true;

            tmpSearch = new Wnlib.Search(verb1, false, posVerb, new Wnlib.SearchType("HYPERPTR"), 0); //szukaj synonimów
            //if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
            //{
            //    foreach (DictionaryEntry morph in tmpSearch.morphs)
            //    {
            //        if (((Wnlib.Search)morph.Value).senses.Count > 0)
            //        {
            //            tmpSearch = (Wnlib.Search)morph.Value;
            //            break;
            //        }
            //    }

            //}
            foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
            {
                if (((Wnlib.Lexeme)lexeme.Key).word == verb2) return true;
            }
            tmpSearch = new Wnlib.Search(verb1, false, posVerb, new Wnlib.SearchType("HYPOPTR"), 0); //szukaj synonimów
            //if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
            //{
            //    foreach (DictionaryEntry morph in tmpSearch.morphs)
            //    {
            //        if (((Wnlib.Search)morph.Value).senses.Count > 0)
            //        {
            //            tmpSearch = (Wnlib.Search)morph.Value;
            //            break;
            //        }
            //    }

            //}
            foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
            {
                if (((Wnlib.Lexeme)lexeme.Key).word == verb2) return true;
            }
            tmpSearch = new Wnlib.Search(verb1, false, posVerb, new Wnlib.SearchType("COORDS"), 0); //szukaj synonimów
            //if (tmpSearch.senses.Count == 0 && tmpSearch.morphs.Count > 0)
            //{
            //    foreach (DictionaryEntry morph in tmpSearch.morphs)
            //    {
            //        if (((Wnlib.Search)morph.Value).senses.Count > 0)
            //        {
            //            tmpSearch = (Wnlib.Search)morph.Value;
            //            break;
            //        }
            //    }

            //}
            foreach (DictionaryEntry lexeme in tmpSearch.lexemes)
            {
                if (((Wnlib.Lexeme)lexeme.Key).word == verb2) return true;
            }
            return false;
        }

        /// <summary>
        /// Searches words standing before a given noun to find the verb connected with it with a verb-object relation
        /// </summary>
        /// <param name="nounPosition"></param>
        /// <param name="sentence"></param>
        /// <returns></returns>
        private string GetVerbContext(int nounPosition, List<AnalysedWord> sentence)
        {
            for (int i = nounPosition - 1; i >= 0; i--)
            {
                string tmp = CanBeAVerb(sentence[i]);
                if (tmp != String.Empty)
                {
                    return tmp;
                }
            }
            return String.Empty;
        }

        public string CanBeAVerb(string word)
        {
            return CanBeAPOS(word, "verb");
        }

        public string CanBeANoun(string word)
        {
            return CanBeAPOS(word, "noun");
        }

        public string CanBeANoun(AnalysedWord word)
        {
            if (word.POS == "NN" || word.POS == "NNS")
            {
                return CanBeANoun(word.Word);
            }
            else
            {
                return String.Empty;
            }
        }

        public string CanBeAVerb(AnalysedWord word)
        {
            if (word.POS == "VBN" || word.POS == "VB" || word.POS == "VBG")
            {
                return CanBeAVerb(word.Word);
            }
            else
            {
                return String.Empty;
            }
        }

        /// <summary>
        /// Checks if the words are the same
        /// </summary>
        /// <param name="myVerb">A known verb in its basic form</param>
        /// <param name="candidate">The word to compare</param>
        /// <param name="partOfSpeech">Part of speech</param>
        /// <returns></returns>
        private bool IsTheSameWord(string myVerb, string candidate, string partOfSpeech)
        {
            if (myVerb == candidate)
            {
                return true;
            }
            else
            {
                Wnlib.WNCommon.path = dictPath;

                Wnlib.PartOfSpeech pos = Wnlib.PartOfSpeech.of(partOfSpeech);
                Wnlib.Search mySearch = new Wnlib.Search(candidate, true, pos, new Wnlib.SearchType(false, "OVERVIEW"), 0);

                foreach (DictionaryEntry morph in mySearch.morphs)
                {
                    if (((Wnlib.Search)morph.Value).word == myVerb)
                    {
                        return true;
                    }
                }
                return false;
            }
        }

        /// <summary>
        /// Checks if the given word is can be the given part of speech. If yes, it returns the basic form of the word, if not it returns String.Empty.
        /// </summary>
        /// <param name="word"></param>
        /// <param name="partOfSpeech"></param>
        /// <returns></returns>
        private string CanBeAPOS(string word, string partOfSpeech)
        {
            Wnlib.WNCommon.path = dictPath;

            Wnlib.PartOfSpeech pos = Wnlib.PartOfSpeech.of(partOfSpeech);
            Wnlib.Search mySearch = new Wnlib.Search(word, true, pos, new Wnlib.SearchType(false, "OVERVIEW"), 0); //Search Types are defined in util.cs:107

            if (mySearch.senses.Count > 0)
            {
                return mySearch.word;
            }
            else
            {
                foreach (DictionaryEntry morph in mySearch.morphs)
                {
                    string tmp = CanBeAPOS(((Wnlib.Search)morph.Value).word, partOfSpeech);
                    if (tmp != String.Empty)
                    {
                        return tmp;
                    }
                }
                return String.Empty;
            }
        }
    }
}
